# IMPORTANT!!! THIS ONLY WORKS ON PY 2.7.2 AND NUMPY 1.6.1!

import editdist
import sys
import csv

#open TAs as argument 1
myfile = open(sys.argv[1], "r")
#read lines in
lineList = myfile.readlines()
#open blank dictionary, key is name then TA sequence
TA_dict={}
#file dictionaries where key is name then open file for MIPs read 1 and 2
F1T_dict={}
F2T_dict={}

#for each TA
for line in lineList:	
	#split the name from the sequence
	dictlist = line.split()
	#name = sequence
	TA_dict[dictlist[0]]=dictlist[1]
	#%s is a place holder for a string...in this case naming the file the name of the MIP
	F1T_dict[dictlist[0]]=open("%s/%s_1.fastq"%(sys.argv[2],dictlist[0]), "w")
	F2T_dict[dictlist[0]]=open("%s/%s_2.fastq"%(sys.argv[2],dictlist[0]), "w")

#opening the fastq	
Fq1 = open(sys.argv[3], "r")
Fq2 = open(sys.argv[4], "r")

junk1=open("%s/junk1MIPseq.fastq"%(sys.argv[2]), "w")
junk2=open("%s/junk2MIPseq.fastq"%(sys.argv[2]), "w")
#check to make sure its making progress
i=0
#while(1) just means while true keep going
while(1):
	#reads lines in order 4 at a time like lines in fastq file
	name1 = Fq1.readline()
	#break at end of file
	if name1 == "": break
	seq1 = Fq1.readline()
	blank1 = Fq1.readline()
	qual1 = Fq1.readline()
	name2 = Fq2.readline()
	seq2 = Fq2.readline()
	blank2 = Fq2.readline()
	qual2 = Fq2.readline()
	# for each name and sequence in TA dict
	key_found=False
	for key,TA in TA_dict.iteritems():
		#if the distance between the first 16 bps of seq2 and first 16 bps of TA is 3 or less, add to Tag files
		dT = editdist.distance(seq1[0:16].upper(),TA[0:16].upper())	
		if (dT <= 3):
			F1T_dict[key].write(name1)
			F1T_dict[key].write(seq1)
			F1T_dict[key].write(blank1)
			F1T_dict[key].write(qual1)
			F2T_dict[key].write(name2)
			F2T_dict[key].write(seq2)
			F2T_dict[key].write(blank2)
			F2T_dict[key].write(qual2)
			key_found=True
			break
			
	if not key_found:
		junk1.write(name1)
		junk1.write(seq1)
		junk1.write(blank1)
		junk1.write(qual1)
		junk2.write(name2)
		junk2.write(seq2)
		junk2.write(blank2)
		junk2.write(qual2)
		
	if i%10000==0:
		print i
	i+=1